{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "048ceec8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pymongo\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "#import gzip\n",
    "import urllib.request\n",
    "from bson import json_util\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f76dea3a",
   "metadata": {},
   "source": [
    "### csv to json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6fd4b316",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('ECG_Rhythm_Lead_I.csv', <http.client.HTTPMessage at 0x7f44a4ad3580>)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# download file\n",
    "urllib.request.urlretrieve(\"https://zenodo.org/record/5711347/files/ECG_Rhythm_Lead_I.csv?download=1\", \n",
    "                           \"ECG_Rhythm_Lead_I.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3fe7609b",
   "metadata": {},
   "outputs": [],
   "source": [
    "csv_name = 'ECG_Rhythm_Lead_I.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "28fe382e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(21837, 1006)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(csv_name, index_col=0)\n",
    "\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f4599212",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Remove the csv from dir\n",
    "os.remove(csv_name) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "1a16b5ee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ecg_id</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "      <th>rhythm_diag</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>...</th>\n",
       "      <th>990</th>\n",
       "      <th>991</th>\n",
       "      <th>992</th>\n",
       "      <th>993</th>\n",
       "      <th>994</th>\n",
       "      <th>995</th>\n",
       "      <th>996</th>\n",
       "      <th>997</th>\n",
       "      <th>998</th>\n",
       "      <th>999</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>63.0</td>\n",
       "      <td>SR</td>\n",
       "      <td>-0.119</td>\n",
       "      <td>-0.116</td>\n",
       "      <td>-0.120</td>\n",
       "      <td>-0.117</td>\n",
       "      <td>...</td>\n",
       "      <td>0.198</td>\n",
       "      <td>0.194</td>\n",
       "      <td>0.115</td>\n",
       "      <td>0.107</td>\n",
       "      <td>0.107</td>\n",
       "      <td>0.106</td>\n",
       "      <td>0.090</td>\n",
       "      <td>0.069</td>\n",
       "      <td>0.086</td>\n",
       "      <td>0.022</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.0</td>\n",
       "      <td>OTHER</td>\n",
       "      <td>0.004</td>\n",
       "      <td>-0.020</td>\n",
       "      <td>-0.053</td>\n",
       "      <td>-0.056</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.035</td>\n",
       "      <td>-0.045</td>\n",
       "      <td>0.004</td>\n",
       "      <td>0.044</td>\n",
       "      <td>0.507</td>\n",
       "      <td>0.554</td>\n",
       "      <td>0.316</td>\n",
       "      <td>0.121</td>\n",
       "      <td>-0.326</td>\n",
       "      <td>-0.348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>37.0</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.0</td>\n",
       "      <td>SR</td>\n",
       "      <td>-0.029</td>\n",
       "      <td>-0.035</td>\n",
       "      <td>-0.054</td>\n",
       "      <td>-0.078</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.040</td>\n",
       "      <td>-0.051</td>\n",
       "      <td>-0.026</td>\n",
       "      <td>-0.032</td>\n",
       "      <td>-0.052</td>\n",
       "      <td>-0.039</td>\n",
       "      <td>-0.034</td>\n",
       "      <td>-0.029</td>\n",
       "      <td>-0.048</td>\n",
       "      <td>-0.049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>24.0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>82.0</td>\n",
       "      <td>SR</td>\n",
       "      <td>-0.054</td>\n",
       "      <td>-0.053</td>\n",
       "      <td>-0.063</td>\n",
       "      <td>-0.060</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.312</td>\n",
       "      <td>-0.511</td>\n",
       "      <td>-0.280</td>\n",
       "      <td>-0.076</td>\n",
       "      <td>-0.012</td>\n",
       "      <td>0.001</td>\n",
       "      <td>-0.003</td>\n",
       "      <td>0.026</td>\n",
       "      <td>0.026</td>\n",
       "      <td>0.028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>19.0</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.0</td>\n",
       "      <td>SR</td>\n",
       "      <td>-0.034</td>\n",
       "      <td>-0.038</td>\n",
       "      <td>-0.057</td>\n",
       "      <td>-0.066</td>\n",
       "      <td>...</td>\n",
       "      <td>0.005</td>\n",
       "      <td>0.001</td>\n",
       "      <td>0.003</td>\n",
       "      <td>0.013</td>\n",
       "      <td>0.018</td>\n",
       "      <td>-0.001</td>\n",
       "      <td>0.007</td>\n",
       "      <td>0.000</td>\n",
       "      <td>-0.003</td>\n",
       "      <td>-0.012</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 1006 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   ecg_id   age  sex  height  weight rhythm_diag      0      1      2      3  \\\n",
       "0       1  56.0    1     NaN    63.0          SR -0.119 -0.116 -0.120 -0.117   \n",
       "1       2  19.0    0     NaN    70.0       OTHER  0.004 -0.020 -0.053 -0.056   \n",
       "2       3  37.0    1     NaN    69.0          SR -0.029 -0.035 -0.054 -0.078   \n",
       "3       4  24.0    0     NaN    82.0          SR -0.054 -0.053 -0.063 -0.060   \n",
       "4       5  19.0    1     NaN    70.0          SR -0.034 -0.038 -0.057 -0.066   \n",
       "\n",
       "   ...    990    991    992    993    994    995    996    997    998    999  \n",
       "0  ...  0.198  0.194  0.115  0.107  0.107  0.106  0.090  0.069  0.086  0.022  \n",
       "1  ... -0.035 -0.045  0.004  0.044  0.507  0.554  0.316  0.121 -0.326 -0.348  \n",
       "2  ... -0.040 -0.051 -0.026 -0.032 -0.052 -0.039 -0.034 -0.029 -0.048 -0.049  \n",
       "3  ... -0.312 -0.511 -0.280 -0.076 -0.012  0.001 -0.003  0.026  0.026  0.028  \n",
       "4  ...  0.005  0.001  0.003  0.013  0.018 -0.001  0.007  0.000 -0.003 -0.012  \n",
       "\n",
       "[5 rows x 1006 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c2651715",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['age', 'height', 'weight']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# find nans\n",
    "nan_cols = [i for i in df.columns if df[i].isnull().any()]\n",
    "nan_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a50381b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# since its only positive value columns - fill nans with -1\n",
    "df = df.fillna(-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d25dd75f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ecg_id</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "      <th>rhythm_diag</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>...</th>\n",
       "      <th>990</th>\n",
       "      <th>991</th>\n",
       "      <th>992</th>\n",
       "      <th>993</th>\n",
       "      <th>994</th>\n",
       "      <th>995</th>\n",
       "      <th>996</th>\n",
       "      <th>997</th>\n",
       "      <th>998</th>\n",
       "      <th>999</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>SR</td>\n",
       "      <td>-0.119</td>\n",
       "      <td>-0.116</td>\n",
       "      <td>-0.120</td>\n",
       "      <td>-0.117</td>\n",
       "      <td>...</td>\n",
       "      <td>0.198</td>\n",
       "      <td>0.194</td>\n",
       "      <td>0.115</td>\n",
       "      <td>0.107</td>\n",
       "      <td>0.107</td>\n",
       "      <td>0.106</td>\n",
       "      <td>0.090</td>\n",
       "      <td>0.069</td>\n",
       "      <td>0.086</td>\n",
       "      <td>0.022</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>OTHER</td>\n",
       "      <td>0.004</td>\n",
       "      <td>-0.020</td>\n",
       "      <td>-0.053</td>\n",
       "      <td>-0.056</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.035</td>\n",
       "      <td>-0.045</td>\n",
       "      <td>0.004</td>\n",
       "      <td>0.044</td>\n",
       "      <td>0.507</td>\n",
       "      <td>0.554</td>\n",
       "      <td>0.316</td>\n",
       "      <td>0.121</td>\n",
       "      <td>-0.326</td>\n",
       "      <td>-0.348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>37.0</td>\n",
       "      <td>1</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>SR</td>\n",
       "      <td>-0.029</td>\n",
       "      <td>-0.035</td>\n",
       "      <td>-0.054</td>\n",
       "      <td>-0.078</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.040</td>\n",
       "      <td>-0.051</td>\n",
       "      <td>-0.026</td>\n",
       "      <td>-0.032</td>\n",
       "      <td>-0.052</td>\n",
       "      <td>-0.039</td>\n",
       "      <td>-0.034</td>\n",
       "      <td>-0.029</td>\n",
       "      <td>-0.048</td>\n",
       "      <td>-0.049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>24.0</td>\n",
       "      <td>0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>SR</td>\n",
       "      <td>-0.054</td>\n",
       "      <td>-0.053</td>\n",
       "      <td>-0.063</td>\n",
       "      <td>-0.060</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.312</td>\n",
       "      <td>-0.511</td>\n",
       "      <td>-0.280</td>\n",
       "      <td>-0.076</td>\n",
       "      <td>-0.012</td>\n",
       "      <td>0.001</td>\n",
       "      <td>-0.003</td>\n",
       "      <td>0.026</td>\n",
       "      <td>0.026</td>\n",
       "      <td>0.028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>19.0</td>\n",
       "      <td>1</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>SR</td>\n",
       "      <td>-0.034</td>\n",
       "      <td>-0.038</td>\n",
       "      <td>-0.057</td>\n",
       "      <td>-0.066</td>\n",
       "      <td>...</td>\n",
       "      <td>0.005</td>\n",
       "      <td>0.001</td>\n",
       "      <td>0.003</td>\n",
       "      <td>0.013</td>\n",
       "      <td>0.018</td>\n",
       "      <td>-0.001</td>\n",
       "      <td>0.007</td>\n",
       "      <td>0.000</td>\n",
       "      <td>-0.003</td>\n",
       "      <td>-0.012</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 1006 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   ecg_id   age  sex  height  weight rhythm_diag      0      1      2      3  \\\n",
       "0       1  56.0    1    -1.0    63.0          SR -0.119 -0.116 -0.120 -0.117   \n",
       "1       2  19.0    0    -1.0    70.0       OTHER  0.004 -0.020 -0.053 -0.056   \n",
       "2       3  37.0    1    -1.0    69.0          SR -0.029 -0.035 -0.054 -0.078   \n",
       "3       4  24.0    0    -1.0    82.0          SR -0.054 -0.053 -0.063 -0.060   \n",
       "4       5  19.0    1    -1.0    70.0          SR -0.034 -0.038 -0.057 -0.066   \n",
       "\n",
       "   ...    990    991    992    993    994    995    996    997    998    999  \n",
       "0  ...  0.198  0.194  0.115  0.107  0.107  0.106  0.090  0.069  0.086  0.022  \n",
       "1  ... -0.035 -0.045  0.004  0.044  0.507  0.554  0.316  0.121 -0.326 -0.348  \n",
       "2  ... -0.040 -0.051 -0.026 -0.032 -0.052 -0.039 -0.034 -0.029 -0.048 -0.049  \n",
       "3  ... -0.312 -0.511 -0.280 -0.076 -0.012  0.001 -0.003  0.026  0.026  0.028  \n",
       "4  ...  0.005  0.001  0.003  0.013  0.018 -0.001  0.007  0.000 -0.003 -0.012  \n",
       "\n",
       "[5 rows x 1006 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d57a5123",
   "metadata": {},
   "outputs": [],
   "source": [
    "jsons_path = '../init-mongodb/data/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "7612dfc0",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Input \u001b[0;32mIn [18]\u001b[0m, in \u001b[0;36m<cell line: 4>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(jsons_path, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m1.json\u001b[39m\u001b[38;5;124m'\u001b[39m), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m      5\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mindex[:N\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m2\u001b[39m]:\n\u001b[1;32m      6\u001b[0m         row_dict \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m      7\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mecg_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mint\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mecg_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]),\n\u001b[1;32m      8\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mfloat\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m\"\u001b[39m]),\n\u001b[1;32m      9\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msex\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mfloat\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msex\u001b[39m\u001b[38;5;124m\"\u001b[39m]),\n\u001b[1;32m     10\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mheight\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mfloat\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mheight\u001b[39m\u001b[38;5;124m\"\u001b[39m]),\n\u001b[1;32m     11\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrhythm_diag\u001b[39m\u001b[38;5;124m\"\u001b[39m: df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrhythm_diag\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[0;32m---> 12\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtime-series\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\u001b[38;5;28mfloat\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mii\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m]) \u001b[38;5;28;01mfor\u001b[39;00m ii \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1000\u001b[39m)]}\n\u001b[1;32m     13\u001b[0m         js \u001b[38;5;241m=\u001b[39m json_util\u001b[38;5;241m.\u001b[39mdumps(row_dict)\n\u001b[1;32m     14\u001b[0m         f\u001b[38;5;241m.\u001b[39mwrite(js)\n",
      "Input \u001b[0;32mIn [18]\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(jsons_path, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m1.json\u001b[39m\u001b[38;5;124m'\u001b[39m), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m      5\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mindex[:N\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m2\u001b[39m]:\n\u001b[1;32m      6\u001b[0m         row_dict \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m      7\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mecg_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mint\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mecg_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]),\n\u001b[1;32m      8\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mfloat\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mage\u001b[39m\u001b[38;5;124m\"\u001b[39m]),\n\u001b[1;32m      9\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msex\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mfloat\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msex\u001b[39m\u001b[38;5;124m\"\u001b[39m]),\n\u001b[1;32m     10\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mheight\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mfloat\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mheight\u001b[39m\u001b[38;5;124m\"\u001b[39m]),\n\u001b[1;32m     11\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrhythm_diag\u001b[39m\u001b[38;5;124m\"\u001b[39m: df\u001b[38;5;241m.\u001b[39mloc[i,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrhythm_diag\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[0;32m---> 12\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtime-series\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\u001b[38;5;28;43mfloat\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mii\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m ii \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1000\u001b[39m)]}\n\u001b[1;32m     13\u001b[0m         js \u001b[38;5;241m=\u001b[39m json_util\u001b[38;5;241m.\u001b[39mdumps(row_dict)\n\u001b[1;32m     14\u001b[0m         f\u001b[38;5;241m.\u001b[39mwrite(js)\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "# create a dict (to be document) per row and add to a json file\n",
    "## split into two json files due to git 100mb limit\n",
    "\n",
    "with open(os.path.join(jsons_path, '1.json'), \"w\") as f:\n",
    "    for i in df.index[:N//2]:\n",
    "        row_dict = {\n",
    "            \"ecg_id\": int(df.loc[i,\"ecg_id\"]),\n",
    "            \"age\": float(df.loc[i,\"age\"]),\n",
    "            \"sex\": float(df.loc[i,\"sex\"]),\n",
    "            \"height\": float(df.loc[i,\"height\"]),\n",
    "            \"rhythm_diag\": df.loc[i,\"rhythm_diag\"],\n",
    "            \"time-series\": [float(df.loc[i, f'{ii}']) for ii in range(1000)]}\n",
    "        js = json_util.dumps(row_dict)\n",
    "        f.write(js)\n",
    "        f.write('\\n')\n",
    "\n",
    "with open(os.path.join(jsons_path, '2.json'), \"w\") as f:\n",
    "    for i in df.index[N//2:]:\n",
    "        row_dict = {\n",
    "            \"ecg_id\": int(df.loc[i,\"ecg_id\"]),\n",
    "            \"age\": float(df.loc[i,\"age\"]),\n",
    "            \"sex\": float(df.loc[i,\"sex\"]),\n",
    "            \"height\": float(df.loc[i,\"height\"]),\n",
    "            \"rhythm_diag\": df.loc[i,\"rhythm_diag\"],\n",
    "            \"time-series\": [float(df.loc[i, f'{ii}']) for ii in range(1000)]}\n",
    "        js = json_util.dumps(row_dict)\n",
    "        f.write(js)\n",
    "        f.write('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27573f93",
   "metadata": {},
   "outputs": [],
   "source": [
    "# f_in = open(json_path)\n",
    "# f_out = gzip.open(f'{json_path}.gz', 'wt')\n",
    "# f_out.writelines(f_in)\n",
    "# f_out.close()\n",
    "# f_in.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "67af8259",
   "metadata": {},
   "outputs": [],
   "source": [
    "# create sh script\n",
    "\n",
    "with open('../init-mongodb/init_db.sh', 'w') as f:\n",
    "    f.write('#!/bin/bash\\n')\n",
    "    f.write('echo \"########### Loading data to Mongo DB ###########\"\\n')\n",
    "    #f.write('gunzip /tmp/data/data.json.gz\\n')\n",
    "    f.write(f'mongoimport --db ecg_db --collection patients --file /tmp/data/1.json\\n')\n",
    "    f.write(f'mongoimport --db ecg_db --collection patients --file /tmp/data/2.json\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "91ff3e27",
   "metadata": {},
   "source": [
    "### or import to mongo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b8f2a5f",
   "metadata": {},
   "outputs": [],
   "source": [
    "#client = pymongo.MongoClient(\"localhost\", 2717)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9213f24d",
   "metadata": {},
   "outputs": [],
   "source": [
    "#client.list_database_names()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02f09133",
   "metadata": {},
   "outputs": [],
   "source": [
    "# New collection\n",
    "#ecg_db = client[\"ecg_data\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14feb652",
   "metadata": {},
   "outputs": [],
   "source": [
    "#mycol = ecg_db[\"patients\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "449421ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "#x = mycol.insert_many(row_dicts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80677721",
   "metadata": {},
   "outputs": [],
   "source": [
    "#ecg_db.list_collection_names()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ddadd816",
   "metadata": {},
   "outputs": [],
   "source": [
    "#ecg_db[\"patients\"].find_one()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5084ec8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
